# This code opens all the datasets, calculates issue voting using the regression approach. It controls for party id
library(foreign)
library(nnet)
library(aod)
library(ggplot2)

# Set working directory to project directory
setwd(wd)

# Load policy implementation dataset
dat <- read.csv("data/implementationdata.csv")

# Get vector of election years
election_year <- c(1956, 1960, 1964, 1968, 1973, 1976, 1979, 1982, 
                   1985, 1988, 1991, 1994, 1998, 2002, 2006, 2010, 2014)

# Only keep VU (Swedish national election study)
dat <- dat[dat$survey=="vu",]

# Set working directory to directory with election datasets
setwd("data/SNES/")

changesfungov <- function(g){
# Create dichotomous vote variable
df$v2 <- NA
df$v2[which(df$vote2 %in% g)] <- 1
df$v2[which(!df$vote2 %in% g)] <- 0
# Create dichotomous vote variable
df$v1 <- NA
df$v1[which(df$vote1 %in% g)] <- 1
df$v1[which(!df$vote1 %in% g)] <- 0
# Create dichotomous pid variable
df$p <- NA
df$p[which(df$pid %in% g)] <- 1
df$p[which(!df$pid %in% g)] <- 0
mod1 <- try(lm(v2 ~ opinion + p + ideology + v1,data=df))
sig <- ifelse(summary(mod1)$coefficients[2,4]<0.05,1,0)
se <- summary(mod1)$coefficients[2,2]
changes <- summary(mod1)$coefficients[2,1]
return(list(sig=sig,changes=changes,se=se))
}


# Create new variables
dat$totalchanges <- NA
dat$iv_changes_gov <- NA
dat$iv_changes_gov_sig <- NA
dat$iv_changes_gov_se <- NA
dat$govsupport <- NA
dat$govsupport_post <- NA

# Get names of datasets in folder
datasets <- sort(list.files())[1:17] # Sort them
datasets <- c(datasets[7:length(datasets)],datasets[1:3])[-2] # move 2000s to end of vector. Remove 1970 
#because there was no panel that year

library(readstata13) # Necessary to open datasets
for(i in 4:length(datasets)){ # Loop through datasets
  # Open 
d <- read.dta13(datasets[i])
year <- substr(datasets[i],1,2) # Extract two-digit year from file names
d$vote1 <- as.factor(d$vote1) # Convert numeric vote variable to factor
# Change numeric factors to appropriate party names
levels(d$vote1)[which(levels(d$vote1)=="11")] <- "socdem"
levels(d$vote1)[which(levels(d$vote1)=="12")] <- "left"
levels(d$vote1)[which(levels(d$vote1)=="13")] <- "green"
levels(d$vote1)[which(levels(d$vote1)=="21")] <- "mod"
levels(d$vote1)[which(levels(d$vote1)=="22")] <- "lib"
levels(d$vote1)[which(levels(d$vote1)=="23")] <- "center"
levels(d$vote1)[which(levels(d$vote1)=="24")] <- "chrisdem"
levels(d$vote1)[which(levels(d$vote1)=="31")] <- "swedem"
levels(d$vote1)[which(levels(d$vote1)=="32")] <- "other"
levels(d$vote1)[which(levels(d$vote1)=="40")] <- "dk"
levels(d$vote1)[which(levels(d$vote1)=="88")] <- "dk"
# Make all other levels missing
levels(d$vote1)[which(!levels(d$vote1) %in% c("socdem","left","green","mod","lib","center","chrisdem","swedem","other","dk"))] <- NA
# Convert vote 2 to factor
d$vote2 <- as.factor(d$vote2)
# Change numeric label names to party names
levels(d$vote2)[which(levels(d$vote2)=="11")] <- "socdem"
levels(d$vote2)[which(levels(d$vote2)=="12")] <- "left"
levels(d$vote2)[which(levels(d$vote2)=="13")] <- "green"
levels(d$vote2)[which(levels(d$vote2)=="21")] <- "mod"
levels(d$vote2)[which(levels(d$vote2)=="22")] <- "lib"
levels(d$vote2)[which(levels(d$vote2)=="23")] <- "center"
levels(d$vote2)[which(levels(d$vote2)=="24")] <- "chrisdem"
levels(d$vote2)[which(levels(d$vote2)=="31")] <- "swedem"
levels(d$vote2)[which(levels(d$vote2)=="32")] <- "other"
levels(d$vote2)[which(levels(d$vote2)=="40")] <- "dk"
levels(d$vote2)[which(levels(d$vote2)=="88")] <- "dk"
# Change all other levels to NA
levels(d$vote2)[which(!levels(d$vote2) %in% c("socdem","left","green","mod","lib","center","chrisdem","swedem","other","dk"))] <- NA
# Only keep cases with non-missing values on pre and post vote
d <- d[!is.na(d$vote1) & !is.na(d$vote2),]

# Now fix pid
d$pid1 <- as.factor(d$pid1) 
# Change numeric factors to appropriate party names
levels(d$pid1)[which(levels(d$pid1)=="11")] <- "socdem"
levels(d$pid1)[which(levels(d$pid1)=="12")] <- "left"
levels(d$pid1)[which(levels(d$pid1)=="13")] <- "green"
levels(d$pid1)[which(levels(d$pid1)=="21")] <- "mod"
levels(d$pid1)[which(levels(d$pid1)=="22")] <- "lib"
levels(d$pid1)[which(levels(d$pid1)=="23")] <- "center"
levels(d$pid1)[which(levels(d$pid1)=="24")] <- "chrisdem"
levels(d$pid1)[which(levels(d$pid1)=="31")] <- "swedem"
levels(d$pid1)[which(levels(d$pid1)=="32")] <- "other"
levels(d$pid1)[which(levels(d$pid1)=="40")] <- "dk"
levels(d$pid1)[which(levels(d$pid1)=="88")] <- "dk"
# Make all other levels missing
levels(d$pid1)[which(!levels(d$pid1) %in% c("socdem","left","green","mod","lib","center","chrisdem","swedem","other","dk"))] <- NA
d$pid2 <- as.factor(d$pid2)
# Change numeric label names to party names
levels(d$pid2)[which(levels(d$pid2)=="11")] <- "socdem"
levels(d$pid2)[which(levels(d$pid2)=="12")] <- "left"
levels(d$pid2)[which(levels(d$pid2)=="13")] <- "green"
levels(d$pid2)[which(levels(d$pid2)=="21")] <- "mod"
levels(d$pid2)[which(levels(d$pid2)=="22")] <- "lib"
levels(d$pid2)[which(levels(d$pid2)=="23")] <- "center"
levels(d$pid2)[which(levels(d$pid2)=="24")] <- "chrisdem"
levels(d$pid2)[which(levels(d$pid2)=="31")] <- "swedem"
levels(d$pid2)[which(levels(d$pid2)=="32")] <- "other"
levels(d$pid2)[which(levels(d$pid2)=="40")] <- "dk"
levels(d$pid2)[which(levels(d$pid2)=="88")] <- "dk"

# Convert pid1 and pid to character
d$pid1 <- as.character(d$pid1)
d$pid2 <- as.character(d$pid2)
# Merge them into pid
d$pid <- NA
d$pid[which(!is.na(d$pid1))] <- d$pid1[which(!is.na(d$pid1))]
d$pid[which(is.na(d$pid1))] <- d$pid2[which(is.na(d$pid1))]


# Convert pid to factor
d$pid <- as.factor(d$pid)

# get issue variable names. These are variable names that start with upper or lower case v followed by numbers
vars <- colnames(d)[grep("^[Vv][0-9]",colnames(d))]
vars <- paste0("vu",year,"_",vars) # Get variable names for policy implementation dataset
# they start with vu followed by two-digit year, underscore then the V/v number

      # Get full four-digit year
  if(as.numeric(year)>10){ # greater than 10 means the year must have been before 2000
    year <- as.numeric(paste0("19",year))
  }else{
    year <- as.numeric(paste0("20",year)) 
  }

# Make sure all levels are in both factors
d$vote1 <- factor(d$vote1, levels=unique(c(levels(d$vote1),levels(d$vote2))))
d$vote2 <- factor(d$vote2, levels=unique(c(levels(d$vote1),levels(d$vote2))))                  
# Total difference in votes. This is a control variable
dat$votechange[dat$year==year] <- sum(abs(prop.table(table(d$vote2))-prop.table(table(d$vote1))))

# Total changes for governing parties. This is a control variable. It is the change in proportions supporting 
# governing parties

# Years the social democrats came to power
      if(year %in% c(1936:1973,1982:1988,1994:2002,2014)){ 
dat$totalchanges[which(dat$year==year)] <- mean(d$vote2=="socdem",na.rm=T) - mean(d$vote1=="socdem",na.rm=T)
      }
# For coalitions, it should be the sums of gains of all parties in government. 
      if(year %in% c(1976,1979)){
        dat$totalchanges[which(dat$year==year)] <- mean(d$vote2 %in% c("center","mod","lib"),na.rm=T) - mean(d$vote1 %in% c("center","mod","lib"),na.rm=T)
      }
# Years the center/moderates/liberals/christian democrats came to power      
      if(year %in% c(1991,2006,2010)){
         dat$totalchanges[which(dat$year==year)] <- mean(d$vote2 %in% c("center","mod","lib","chrisdem"),na.rm=T) - mean(d$vote1 %in% c("center","mod","lib","chrisdem"),na.rm=T)
      }

# Loop through variables in election dataset
for(j in 1:length(vars)){
  if(length(which(dat$org_var==vars[j]))==0){ # This is to make sure the v is in the right case
    # If it doesn't find a variable with that name, extract the v from the name
    # if it is in upper case, make it lower case, otherwise make uppercase
   v <- substr(strsplit(vars[j],"_")[[1]][[2]],1,1)
   v <- ifelse(toupper(v)==v, tolower(v),toupper(v))
   #
      no <- strsplit(vars[j],"_")[[1]][[2]] # Get variable number
      # Change variable number to: first part (vuyy) underscore corrected v then digits from variable number
    vars[j] <- paste0(strsplit(vars[j],"_")[[1]][[1]],"_",v,substr(no,2,nchar(no)))
  }

    if(length(which(dat$org_var==vars[j]))>0){ # Only keep variables that can now be found (after fixing case of v)
      if(dat$switcher[which(dat$org_var==vars[j])]==1){ # If we changed the coding of the implementation variable
        # Change the coding of the opinion variable by reversing the sign
        d[,grep("^[Vv][0-9]",colnames(d))[j]][d[,grep("^[Vv][0-9]",colnames(d))[j]]==1] <- -2
        d[,grep("^[Vv][0-9]",colnames(d))[j]][d[,grep("^[Vv][0-9]",colnames(d))[j]]==-1] <- 1 
        d[,grep("^[Vv][0-9]",colnames(d))[j]][d[,grep("^[Vv][0-9]",colnames(d))[j]]==-2] <- -1 
        }
      # create data frame with issue opinions and vote choice
      if(length(vars)>1){ # If there's more than one issue variable, select all columns with issue variables, then select
        #jth column
        df <- data.frame(opinion=c(d[,grep("^[Vv][0-9]",colnames(d))][,j]),ideology=c(d$ideology),pid=c(d$pid),vote1=c(d$vote1),vote2=c(d$vote2))
        # Make vote a factor
         df$vote1 <- as.factor(df$vote1)
         levels(df$vote1) <- levels(d$vote1)
         df$vote2 <- as.factor(df$vote2)
         levels(df$vote2) <- levels(d$vote2)
         # Make pid a factor
         df$pid <- as.factor(df$pid)
         levels(df$pid) <- levels(d$pid)
        o <- d[,grep("^[Vv][0-9]",colnames(d))][,j]
                          }
      if(length(vars)==1){ # If there's only one issue variable, select that column
         df <- data.frame(opinion=c(d[,grep("^[Vv][0-9]",colnames(d))]),ideology=c(d$ideology),pid=c(d$pid),vote1=c(d$vote1),vote2=c(d$vote2))
        # Make vote a factor
         df$vote1 <- as.factor(df$vote1)
         levels(df$vote1) <- levels(d$vote1)
         df$vote2 <- as.factor(df$vote2)
         levels(df$vote2) <- levels(d$vote2)
           # Make pid a factor
         df$pid <- as.factor(df$pid)
         levels(df$pid) <- levels(d$pid)
         o <- d[,grep("^[Vv][0-9]",colnames(d))]

      }
      # Only calculate differences if issue opinions are not all missing and if there is some variance in issue voting
 if(mean(!is.na(o))>0 & length(unique(o))>1){
      # Rescale opinions from 0 to 1    
      df$opinion[which(df$opinion==0)] <- 0.5
      df$opinion[which(df$opinion==-1)] <- 0
    # Rescale ideology from 0 to 1
      df$ideology <- (df$ideology - min(df$ideology,na.rm=T))/(max(df$ideology,na.rm=T)-min(df$ideology,na.rm=T))
     # Give NAs median position
      df$ideology[is.na(df$ideology)] <- median(df$ideology[!is.na(df$ideology)],na.rm=T)
      # Now consider governing parties
      # Reset iv and modiv to null
      iv <- NULL
      modiv <- NULL
    if(dat$year[which(dat$org_var==vars[j])] %in% c(1936:1973,1982:1988,1994:2002,2014)){
      g <- "socdem"
      try(assign("modiv",changesfungov(g)))
      ifelse(length(modiv)>0,assign("iv",modiv$changes),NA)
      dat$iv_changes_gov[which(dat$org_var==vars[j])] <- iv    
      ifelse(length(modiv)>0,assign("iv",modiv$sig),NA)
      dat$iv_changes_gov_sig[which(dat$org_var==vars[j])] <- iv
      ifelse(length(modiv)>0,assign("iv",modiv$se),NA)
      dat$iv_changes_gov_se[which(dat$org_var==vars[j])] <- iv
      dat$govsupport[dat$org_var==vars[j]] <- dat$ps_fvr_dk[dat$org_var==vars[j]] # Proportion who support the change among supporters of govt parties
      dat$govsupport_post[dat$org_var==vars[j]] <- mean(df$opinion[which(df$vote2 %in% g)]==1,na.rm=T)

    }
      
      if(dat$year[which(dat$org_var==vars[j])] %in% c(1976,1979)){
        g <- c("center","mod","lib")
     try(assign("modiv",changesfungov(g)))
      ifelse(length(modiv)>0,assign("iv",modiv$changes),NA)
      dat$iv_changes_gov[which(dat$org_var==vars[j])] <- iv    
      ifelse(length(modiv)>0,assign("iv",modiv$sig),NA)
      dat$iv_changes_gov_sig[which(dat$org_var==vars[j])] <- iv
      ifelse(length(modiv)>0,assign("iv",modiv$se),NA)
      dat$iv_changes_gov_se[which(dat$org_var==vars[j])] <- iv
      dat$govsupport[dat$org_var==vars[j]] <- dat$pallians_fvr_dk[dat$org_var==vars[j]]
      dat$govsupport_post[dat$org_var==vars[j]] <- mean(df$opinion[which(df$vote2 %in% g)]==1,na.rm=T)

    
      }
      
       if(dat$year[which(dat$org_var==vars[j])] %in% c(1991,2006,2010)){
        g <- c("center","mod","lib","chrisdem")
      try(assign("modiv",changesfungov(g)))
      ifelse(length(modiv)>0,assign("iv",modiv$changes),NA)
      dat$iv_changes_gov[which(dat$org_var==vars[j])] <- iv    
      ifelse(length(modiv)>0,assign("iv",modiv$sig),NA)
      dat$iv_changes_gov_sig[which(dat$org_var==vars[j])] <- iv
      ifelse(length(modiv)>0,assign("iv",modiv$se),NA)
      dat$iv_changes_gov_se[which(dat$org_var==vars[j])] <- iv
      dat$govsupport[dat$org_var==vars[j]] <- dat$pallians_fvr_dk[dat$org_var==vars[j]]
      dat$govsupport_post[dat$org_var==vars[j]] <- mean(df$opinion[which(df$vote2 %in% g)]==1,na.rm=T)
      }
           
  iyear <- election_year[which(election_year==year)+1] # Get year of next election
  years <- iyear-year # Get number of years till next election. 
  var <- paste0("year",0:years,"policy") # Get all relevant policy variables
  dat$policy[dat$org_var==vars[j]] <- ifelse(mean(dat[dat$org_var==vars[j],var]==1,na.rm=T)!=0,1,0)# If a proportion greater than 0 takes the value of 1 code this variable 1
  dat$years[dat$org_var==vars[j]] <- years # Record number of years between elections

  }   
 }
     }

}

dat$totalgains <- dat$totalchanges
dat$totalgains[which(dat$totalgains<0)] <- 0
dat$totallosses <- dat$totalchanges
dat$totallosses[which(dat$totallosses<0)] <- 0


dat$gw_fvr_dk <- dat$gw_fvr_dk*100
dat$gw_fvr <- dat$gw_fvr*100
dat$govsupport <- dat$govsupport*100
dat$govsupport_post <- dat$govsupport_post*100
dat$coalition <- ifelse(dat$year %in% c(1936:1973,1982:1988,1994:2002,2014),0,1)

# These issues were removed
dat <- dat[-which(dat$org_var %in% c("vu10_V843","vu02_V270")),] # Remove. Asked in the post survey
dat <- dat[-which(dat$org_var %in% c("vu10_V571")),] # Remove. No data on policy implementation
dat <- dat[-which(dat$org_var %in% c("vu70_V117", "vu70_V93", "vu70_V116", "vu70_V92", "vu10_V843", 
                                     "vu02_V270", "vu70_V134","vu10_V874")),] # Remove issues from 1970 and issues not in pre

# Multiply issue voting measures by 100
dat$iv_changes_gains <- dat$iv_changes_gov
dat$iv_changes_gains[which(dat$iv_changes_gains<0 & dat$iv_changes_gov_sig==1)] <- 0
dat$iv_changes_gains <- dat$iv_changes_gains*100

dat$iv_changes_losses <- dat$iv_changes_gov
dat$iv_changes_losses[which(dat$iv_changes_losses>0 & dat$iv_changes_gov_sig==1)] <- 0
dat$iv_changes_losses <- dat$iv_changes_losses*100

dat$iv_changes_gov_abs <- abs(dat$iv_changes_gov)*100
dat$iv_changes_gov <- dat$iv_changes_gov*100

# Create dummies for significant gains/losses
dat$iv_changes_gains_sig <- ifelse(dat$iv_changes_gov>0 & dat$iv_changes_gov_sig==1,1,0)
dat$iv_changes_losses_sig <- ifelse(dat$iv_changes_gov<0 & dat$iv_changes_gov_sig==1,1,0)
dat$iv_changes_gains_sig[which(dat$iv_changes_losses_sig==1)] <- NA
dat$iv_changes_losses_sig[which(dat$iv_changes_gains_sig==1)] <- NA

save(dat,file=paste0(wd,"/data/dataforstage2_pid.Rda"))


